In [2]:
import geoplot as gplt
import geopandas as gpd
import geoplot.crs as gcrs
import imageio
import pandas as pd
import pathlib
import matplotlib.animation as animation
import matplotlib.pyplot as plt
import mapclassify as mc
import numpy as np

%matplotlib inline

pd.options.display.max_rows = 500
pd.options.display.max_columns = 500
In [3]:
usa = gpd.read_file("maps/cb_2018_us_state_20m.shp")
usa.head()
Out[3]:
STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND AWATER geometry
0 24 01714934 0400000US24 24 MD Maryland 00 25151100280 6979966958 MULTIPOLYGON (((-76.04621 38.02553, -76.00734 ...
1 19 01779785 0400000US19 19 IA Iowa 00 144661267977 1084180812 POLYGON ((-96.62187 42.77925, -96.57794 42.827...
2 10 01779781 0400000US10 10 DE Delaware 00 5045925646 1399985648 POLYGON ((-75.77379 39.72220, -75.75323 39.757...
3 39 01085497 0400000US39 39 OH Ohio 00 105828882568 10268850702 MULTIPOLYGON (((-82.86334 41.69369, -82.82572 ...
4 42 01779798 0400000US42 42 PA Pennsylvania 00 115884442321 3394589990 POLYGON ((-80.51989 40.90666, -80.51964 40.987...
In [4]:
state_pop = pd.read_csv("data/nst-est2018-alldata.csv")
state_pop.head()
Out[4]:
SUMLEV REGION DIVISION STATE NAME CENSUS2010POP ESTIMATESBASE2010 POPESTIMATE2010 POPESTIMATE2011 POPESTIMATE2012 ... RDOMESTICMIG2017 RDOMESTICMIG2018 RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015 RNETMIG2016 RNETMIG2017 RNETMIG2018
0 10 0 0 0 United States 308745538 308758105 309326085 311580009 313874218 ... 0.000000 0.000000 2.553948 2.746049 2.701727 2.988276 3.328598 3.321549 2.941086 3.001086
1 20 1 0 0 Northeast Region 55317240 55318430 55380645 55600532 55776729 ... -5.651919 -5.222289 0.845134 0.040762 -0.397011 -0.923951 -2.011735 -2.430459 -1.801582 -1.127222
2 20 2 0 0 Midwest Region 66927001 66929743 66974749 67152631 67336937 ... -2.370672 -2.301663 -1.043009 -0.896575 0.042505 -0.715656 -1.357662 -1.226811 -0.519621 -0.431833
3 20 3 0 0 South Region 114555744 114563045 114867066 116039399 117271075 ... 2.963135 2.779373 5.379667 5.836112 5.290067 6.206402 7.328494 7.225046 6.252425 6.148925
4 20 4 0 0 West Region 71945553 71946887 72103625 72787447 73489477 ... 1.478565 1.350094 2.689358 3.226360 3.343874 4.148127 5.127995 5.372314 4.164981 3.965769

5 rows × 136 columns

In [5]:
pop_states = usa.merge(state_pop, left_on="NAME", right_on="NAME")
pop_states.head()
Out[5]:
STATEFP STATENS AFFGEOID GEOID STUSPS NAME LSAD ALAND AWATER geometry ... RDOMESTICMIG2017 RDOMESTICMIG2018 RNETMIG2011 RNETMIG2012 RNETMIG2013 RNETMIG2014 RNETMIG2015 RNETMIG2016 RNETMIG2017 RNETMIG2018
0 24 01714934 0400000US24 24 MD Maryland 00 25151100280 6979966958 MULTIPOLYGON (((-76.04621 38.02553, -76.00734 ... ... -3.991992 -4.063440 3.600733 3.315179 1.775836 1.160255 0.315784 -1.352135 -0.208652 -0.322019
1 19 01779785 0400000US19 19 IA Iowa 00 144661267977 1084180812 POLYGON ((-96.62187 42.77925, -96.57794 42.827... ... -1.278002 -0.916222 1.843768 -0.120479 2.359797 1.925327 0.702299 0.036461 0.573348 0.934001
2 10 01779781 0400000US10 10 DE Delaware 00 5045925646 1399985648 POLYGON ((-75.77379 39.72220, -75.75323 39.757... ... 4.689728 7.127976 4.801565 4.910826 6.209397 6.493793 6.755571 5.516683 6.460703 9.019623
3 39 01085497 0400000US39 39 OH Ohio 00 105828882568 10268850702 MULTIPOLYGON (((-82.86334 41.69369, -82.82572 ... ... -0.698138 -1.040184 -1.999325 -1.820990 0.365060 -0.030544 -0.494039 -0.313424 0.968963 0.716636
4 42 01779798 0400000US42 42 PA Pennsylvania 00 115884442321 3394589990 POLYGON ((-80.51989 40.90666, -80.51964 40.987... ... -2.144836 -1.598828 1.516750 0.547598 -0.023724 -0.133225 -0.921843 -0.952470 0.302260 1.165270

5 rows × 145 columns

In [6]:
pop_states[pop_states.NAME=="California"].plot()
Out[6]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7de6c3518>
In [7]:
path = gplt.datasets.get_path("contiguous_usa")
contiguous_usa = gpd.read_file(path)
contiguous_usa.head()
Out[7]:
state adm1_code population geometry
0 Minnesota USA-3514 5303925 POLYGON ((-89.59941 48.01027, -89.48888 48.013...
1 Montana USA-3515 989415 POLYGON ((-111.19419 44.56116, -111.29155 44.7...
2 North Dakota USA-3516 672591 POLYGON ((-96.60136 46.35136, -96.53891 46.199...
3 Idaho USA-3518 1567582 POLYGON ((-111.04973 44.48816, -111.05025 42.0...
4 Washington USA-3519 6724540 POLYGON ((-116.99807 46.33017, -116.90653 46.1...
In [8]:
gplt.polyplot(contiguous_usa)
Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7dc599908>
In [9]:
path = gplt.datasets.get_path("usa_cities")
usa_cities = gpd.read_file(path)
usa_cities.head()
Out[9]:
id POP_2010 ELEV_IN_FT STATE geometry
0 53 40888.0 1611.0 ND POINT (-101.29627 48.23251)
1 101 52838.0 830.0 ND POINT (-97.03285 47.92526)
2 153 15427.0 1407.0 ND POINT (-98.70844 46.91054)
3 177 105549.0 902.0 ND POINT (-96.78980 46.87719)
4 192 17787.0 2411.0 ND POINT (-102.78962 46.87918)
In [10]:
continental_usa_cities = usa_cities.query('STATE not in ["HI", "AK", "PR"]')
gplt.pointplot(continental_usa_cities)
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7de64fc50>
In [11]:
ax = gplt.polyplot(contiguous_usa)
gplt.pointplot(continental_usa_cities, ax=ax)
Out[11]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fe7dba0c278>
In [12]:
ax = gplt.polyplot(contiguous_usa, projection=gcrs.AlbersEqualArea())
gplt.pointplot(continental_usa_cities, ax=ax)
Out[12]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7db997198>
In [13]:
ax = gplt.polyplot(contiguous_usa, projection=gcrs.AlbersEqualArea())

gplt.pointplot(
    continental_usa_cities,
    ax=ax,
    hue="ELEV_IN_FT",
    legend=True
)
Out[13]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7db9227b8>
In [14]:
ax = gplt.polyplot(
    contiguous_usa, 
    edgecolor="white",
    facecolor="lightgray",
    figsize=(12, 8),
    projection=gcrs.AlbersEqualArea()
)

gplt.pointplot(
    continental_usa_cities,
    ax=ax,
    hue="ELEV_IN_FT",
    cmap="Blues",
    scheme="quantiles",
    scale="ELEV_IN_FT",
    limits=(1, 10),
    legend=True,
    legend_var="scale",
    legend_kwargs={"frameon": False},
    legend_values=[-110, 1750, 3600, 5500, 7400],
    legend_labels=["-110 feet", "1750 feet", "3600 feet", "5500 feet", "7400 feet"]
)

ax.set_title("Cities in the continental US, by elevation", fontsize=16)
Out[14]:
Text(0.5, 1.0, 'Cities in the continental US, by elevation')
In [15]:
ax = gplt.polyplot(contiguous_usa, projection=gcrs.AlbersEqualArea())

gplt.choropleth(
    contiguous_usa,
    hue="population",
    edgecolor="white",
    linewidth=1,
    cmap="Greens",
    legend=True,
    scheme="FisherJenks",
    legend_labels=[
        "<3 million", "3-6.7 million", "6.7-12.8 million",
        "12.8-25 million", "25-37 million"
    ],
    projection=gcrs.AlbersEqualArea(),
    ax=ax
)
Out[15]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7db7d5908>
In [16]:
boroughs = gpd.read_file(gplt.datasets.get_path("nyc_boroughs"))
collisions = gpd.read_file(gplt.datasets.get_path("nyc_collision_factors"))

ax = gplt.polyplot(boroughs, projection=gcrs.AlbersEqualArea())
gplt.kdeplot(collisions, cmap="Reds", shade=True, clip=boroughs, ax=ax)
Out[16]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7d951ce80>
In [17]:
ax = gplt.polyplot(contiguous_usa, projection=gcrs.AlbersEqualArea())

gplt.kdeplot(
    continental_usa_cities, 
    cmap="Reds", 
    shade=True, 
    clip=contiguous_usa, 
    ax=ax
)
Out[17]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7d8f934a8>
In [18]:
obesity = pd.read_csv(gplt.datasets.get_path("obesity_by_state"), sep="\t")
obesity.head()
Out[18]:
State Percent
0 Alabama 32.4
1 Missouri 30.4
2 Alaska 28.4
3 Montana 24.6
4 Arizona 26.8
In [19]:
geo_obesity = contiguous_usa.set_index("state").join(obesity.set_index("State"))
geo_obesity.head()
Out[19]:
adm1_code population geometry Percent
state
Minnesota USA-3514 5303925 POLYGON ((-89.59941 48.01027, -89.48888 48.013... 25.5
Montana USA-3515 989415 POLYGON ((-111.19419 44.56116, -111.29155 44.7... 24.6
North Dakota USA-3516 672591 POLYGON ((-96.60136 46.35136, -96.53891 46.199... 31.0
Idaho USA-3518 1567582 POLYGON ((-111.04973 44.48816, -111.05025 42.0... 29.6
Washington USA-3519 6724540 POLYGON ((-116.99807 46.33017, -116.90653 46.1... 27.2
In [20]:
gplt.cartogram(
    geo_obesity,
    scale="Percent",
    projection=gcrs.AlbersEqualArea()
)
Out[20]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7d8e3ae80>
In [21]:
scheme = mc.Quantiles(continental_usa_cities["ELEV_IN_FT"], k=10)
scheme
Out[21]:
Quantiles                 

     Interval        Count
--------------------------
[-112.00,   26.00] |   382
(  26.00,   72.00] |   351
(  72.00,  157.00] |   362
( 157.00,  328.00] |   353
( 328.00,  528.00] |   366
( 528.00,  646.00] |   359
( 646.00,  778.00] |   359
( 778.00,  948.00] |   363
( 948.00, 1310.50] |   359
(1310.50, 7369.00] |   362
In [22]:
gplt.pointplot(
    continental_usa_cities,
    projection=gcrs.AlbersEqualArea(),
    hue="ELEV_IN_FT",
    scheme=scheme,
    cmap="inferno_r",
    legend=True
)
Out[22]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7d8d3a9e8>
In [23]:
import warnings

warnings.filterwarnings("ignore", "GeoSeries.isna", UserWarning)
In [24]:
melbourne = gpd.read_file(gplt.datasets.get_path("melbourne"))
df = gpd.read_file(gplt.datasets.get_path("melbourne_schools"))
melbourne_primary_schools = df.query('School_Type == "Primary"')

ax = gplt.voronoi(
    melbourne_primary_schools,
    clip=melbourne,
    linewidth=0.5,
    edgecolor="white",
    projection=gcrs.Mercator()
)

gplt.polyplot(
    melbourne, 
    edgecolor="None", 
    facecolor="lightgray",
    ax=ax
)

gplt.pointplot(
    melbourne_primary_schools,
    color="black",
    ax=ax,
    s=1,
    extent=melbourne.total_bounds
)

plt.title("Primary Schools in Greater Melbourne, 2018")
/home/ceteri/venv/lib/python3.6/site-packages/geoplot/geoplot.py:625: FutureWarning:     You are passing non-geometry data to the GeoSeries constructor. Currently,
    it falls back to returning a pandas Series. But in the future, we will start
    to raise a TypeError instead.
  extent = gpd.GeoSeries(self.extent) if self.extent is not None else None
Out[24]:
Text(0.5, 1.0, 'Primary Schools in Greater Melbourne, 2018')
In [25]:
proj = gplt.crs.AlbersEqualArea(
    central_longitude=-98,
    central_latitude=39.5
)

ax = gplt.voronoi(
    continental_usa_cities,
    hue="ELEV_IN_FT",
    clip=contiguous_usa,
    projection=proj,
    cmap="Reds",
    legend=True,
    edgecolor="white",
    linewidth=0.01
)

gplt.polyplot(
    contiguous_usa,
    ax=ax,
    extent=contiguous_usa.total_bounds,
    edgecolor="black",
    linewidth=1,
    zorder=1
)
/home/ceteri/venv/lib/python3.6/site-packages/geoplot/geoplot.py:625: FutureWarning:     You are passing non-geometry data to the GeoSeries constructor. Currently,
    it falls back to returning a pandas Series. But in the future, we will start
    to raise a TypeError instead.
  extent = gpd.GeoSeries(self.extent) if self.extent is not None else None
Out[25]:
<cartopy.mpl.geoaxes.GeoAxesSubplot at 0x7fe7d9814470>
In [26]:
ihme = pd.read_csv("data/Hospitalization_all_locs.csv")
ihme.head()
Out[26]:
location_name date allbed_mean allbed_lower allbed_upper ICUbed_mean ICUbed_lower ICUbed_upper InvVen_mean InvVen_lower ... newICU_upper totdea_mean totdea_lower totdea_upper bedover_mean bedover_lower bedover_upper icuover_mean icuover_lower icuover_upper
0 Abruzzo 2020-01-03 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Abruzzo 2020-01-04 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Abruzzo 2020-01-05 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Abruzzo 2020-01-06 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 Abruzzo 2020-01-07 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 29 columns

In [27]:
is_earthday = ihme["date"]=="2020-04-22"
is_earthday.head()
Out[27]:
0    False
1    False
2    False
3    False
4    False
Name: date, dtype: bool
In [28]:
cv19 = contiguous_usa.merge(ihme[is_earthday], left_on="state", right_on="location_name")
cv19.head()
Out[28]:
state adm1_code population geometry location_name date allbed_mean allbed_lower allbed_upper ICUbed_mean ... newICU_upper totdea_mean totdea_lower totdea_upper bedover_mean bedover_lower bedover_upper icuover_mean icuover_lower icuover_upper
0 Minnesota USA-3514 5303925 POLYGON ((-89.59941 48.01027, -89.48888 48.013... Minnesota 2020-04-22 324.575350 93.19875 1047.148039 94.219099 ... 50.779167 171.975 161.0 205.000 0.0 0.0 0.0 0.0 0.0 0.000000
1 Montana USA-3515 989415 POLYGON ((-111.19419 44.56116, -111.29155 44.7... Montana 2020-04-22 27.096500 5.80000 106.912500 7.215450 ... 3.201250 14.271 13.0 19.000 0.0 0.0 0.0 0.0 0.0 0.000000
2 North Dakota USA-3516 672591 POLYGON ((-96.60136 46.35136, -96.53891 46.199... North Dakota 2020-04-22 99.412673 10.90000 373.926786 28.695143 ... 22.698077 16.726 13.0 28.000 0.0 0.0 0.0 0.0 0.0 26.148214
3 Idaho USA-3518 1567582 POLYGON ((-111.04973 44.48816, -111.05025 42.0... Idaho 2020-04-22 60.246750 18.64875 161.480000 17.521550 ... 5.900000 49.782 48.0 54.000 0.0 0.0 0.0 0.0 0.0 0.000000
4 Washington USA-3519 6724540 POLYGON ((-116.99807 46.33017, -116.90653 46.1... Washington 2020-04-22 463.035901 228.88000 1033.948611 136.228431 ... 37.339444 695.345 685.0 719.025 0.0 0.0 0.0 0.0 0.0 0.000000

5 rows × 33 columns

In [29]:
deaths_per_mil = cv19["deaths_mean"] / cv19["population"] * 1000000.0
cv19["deaths_per_mil"] = deaths_per_mil
In [65]:
ax = gplt.choropleth(
    cv19,
    hue="deaths_per_mil",
    edgecolor="white",
    linewidth=5,
    cmap="Reds",
    alpha = 0.8,
    projection=gcrs.AlbersEqualArea(),
    figsize=(30, 30)
)

ax = gplt.pointplot(
    continental_usa_cities,
    hue="POP_2010",
    cmap="Greens",
    scheme="quantiles",
    scale="POP_2010",
    limits=(3, 50),
    zorder=2, 
    alpha = 0.4,
    ax=ax
)

ax.set_title(
    "COVID-19 deaths/million vs. population, on Earth Day 2020",
    fontsize=36
)
In [211]:
def plot_choropleth (anim_path, date, cv19, cities):
    ax = gplt.choropleth(
        cv19,
        hue="deaths_per_mil",
        edgecolor="white",
        linewidth=5,
        cmap="Reds",
        alpha = 0.8,
        projection=gcrs.AlbersEqualArea(),
        figsize=(30, 30)
    )
    
    ax = gplt.pointplot(
        cities,
        hue="POP_2010",
        cmap="Greens",
        scheme="quantiles",
        scale="POP_2010",
        limits=(3, 50),
        zorder=2, 
        alpha = 0.4,
        ax=ax
    )

    ax.set_title(
        f"COVID-19 deaths/million vs. population on {date}",
        fontsize=36
    )

    file_name = str(anim_path / "{}.png".format(date.replace("-", "")))
    plt.savefig(file_name, bbox_inches="tight", pad_inches=0.1)

    return file_name
In [212]:
date_set = set([])

for d in ihme["date"].tolist():
    if d >= "2020-03-23" and d <= "2020-04-01":
        date_set.add(d)

dates = sorted(list(date_set))
In [213]:
anim_path = pathlib.Path("anim/")
anim_path.mkdir(parents=True, exist_ok=True)

fig = plt.figure()
image_files = []

for date in dates:
    is_earthday = ihme["date"]==date
    cv19 = contiguous_usa.merge(ihme[is_earthday], left_on="state", right_on="location_name")
    
    deaths_per_mil = cv19["deaths_mean"] / cv19["population"] * 1000000.0
    cv19["deaths_per_mil"] = deaths_per_mil

    file_name = plot_choropleth(anim_path, date, cv19, continental_usa_cities)
    image_files.append(file_name)
<Figure size 432x288 with 0 Axes>
In [214]:
images = []

for file_name in image_files:
    images.append(imageio.imread(file_name))

gif_path = "movie.gif"
imageio.mimsave(gif_path, images, fps=2)
In [215]:
from IPython.display import HTML

HTML('<img src="movie.gif" />')
Out[215]:
In [ ]: